# https://satijalab.org/signac/articles/pbmc_multiomic.html
counts <- Read10X_h5("filtered_feature_bc_matrix.h5")
fragpath <- "atac_fragments.tsv.gz"
# Get gene annotations for HG38
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
genome(annotation) <- "hg38"
seqlevelsStyle(annotation) <- "UCSC"
# Create a Seurat object containing the RNA data
npm1 <- CreateSeuratObject(
counts = counts$`Gene Expression`,
assay = "RNA"
)
# npm1[["percent.mt"]] <- PercentageFeatureSet(npm1, pattern = "^MT-")
#
# # create ATAC assay and add it to the object
#
# npm1[["ATAC"]] <- CreateChromatinAssay(
# counts = counts$Peaks,
# sep = c(":", "-"),
# fragments = fragpath,
# annotation = annotation
#
# )
Violin plot of RNA counts
DefaultAssay(npm1) <- "RNA"
VlnPlot(
object = npm1,
features = c("nCount_RNA"),
ncol = 4,
pt.size = 0
)
Discard low count genes and display subsequent violin plot.
npm1 <- subset(
x = npm1,
subset = nCount_RNA < 7500
)
VlnPlot(
object = npm1,
features = c("nCount_RNA"),
ncol = 4,
pt.size = 0
)
Dimensionality reduction: SC transform and PCA
#npm1 <- SCTransform(npm1) %>% RunPCA() %>% RunUMAP(dims = 1:50, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_')
npm1 <- SCTransform(npm1)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|= | 2%
|
|=== | 4%
|
|==== | 6%
|
|===== | 8%
|
|======= | 9%
|
|======== | 11%
|
|========= | 13%
|
|=========== | 15%
|
|============ | 17%
|
|============= | 19%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 25%
|
|================== | 26%
|
|==================== | 28%
|
|===================== | 30%
|
|====================== | 32%
|
|======================== | 34%
|
|========================= | 36%
|
|========================== | 38%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|================================ | 45%
|
|================================= | 47%
|
|================================== | 49%
|
|==================================== | 51%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================== | 60%
|
|============================================ | 62%
|
|============================================= | 64%
|
|============================================== | 66%
|
|================================================ | 68%
|
|================================================= | 70%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|====================================================== | 77%
|
|======================================================= | 79%
|
|========================================================= | 81%
|
|========================================================== | 83%
|
|=========================================================== | 85%
|
|============================================================= | 87%
|
|============================================================== | 89%
|
|=============================================================== | 91%
|
|================================================================= | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|===================================================================== | 98%
|
|======================================================================| 100%
##
|
| | 0%
|
|= | 2%
|
|=== | 4%
|
|==== | 6%
|
|===== | 8%
|
|======= | 9%
|
|======== | 11%
|
|========= | 13%
|
|=========== | 15%
|
|============ | 17%
|
|============= | 19%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 25%
|
|================== | 26%
|
|==================== | 28%
|
|===================== | 30%
|
|====================== | 32%
|
|======================== | 34%
|
|========================= | 36%
|
|========================== | 38%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|================================ | 45%
|
|================================= | 47%
|
|================================== | 49%
|
|==================================== | 51%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================== | 60%
|
|============================================ | 62%
|
|============================================= | 64%
|
|============================================== | 66%
|
|================================================ | 68%
|
|================================================= | 70%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|====================================================== | 77%
|
|======================================================= | 79%
|
|========================================================= | 81%
|
|========================================================== | 83%
|
|=========================================================== | 85%
|
|============================================================= | 87%
|
|============================================================== | 89%
|
|=============================================================== | 91%
|
|================================================================= | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|===================================================================== | 98%
|
|======================================================================| 100%
npm1 <- RunPCA(npm1)
Add cell identity status from Seurat object to NPM1 Seurat metadata.
reference <- readRDS("~/Downloads/namlab/NPM1_seurat/NPM1_seurat.rds")
npm1 <- AddMetaData(
object = npm1,
metadata = reference@meta.data %>% select(Cell.Ident_Mutation.Status) %>% filter(rownames(.) %in% BiocGenerics::intersect(rownames(npm1@meta.data), rownames(reference@meta.data))))
Idents(npm1) <- "Cell.Ident_Mutation.Status"
Find DEGs using FindMarkers(). Convert gene symbols to ENTREZ IDs and add to dataframe.
DefaultAssay(npm1) <- "SCT"
stem_cell_markers_1 <- FindMarkers(npm1, ident.1 = "HSC1_MUT", ident.2 = "HSC1_WT", only.pos = FALSE, logfc.threshold = 0) %>% arrange(desc(avg_log2FC))
stem_cell_markers_1$entrez = mapIds(org.Hs.eg.db, rownames(stem_cell_markers_1), 'ENTREZID', 'SYMBOL')
stem_cell_markers_1 = na.omit(stem_cell_markers_1)
Split DEG list into upregulated and downregulated genes.
upreg = subset(stem_cell_markers_1, subset = avg_log2FC > 0.25 & p_val_adj < 0.01)
downreg = subset(stem_cell_markers_1, subset = avg_log2FC < -0.25 & p_val_adj < 0.01)
Create sorted list of upregulated and downregulated genes.
#upreg_list <- sign(upreg$avg_log2FC)*(-log10(upreg$p_val_adj))
upreg_list <- upreg$avg_log2FC
names(upreg_list) <- rownames(upreg)
upreg_list <- upreg_list[na.exclude(names(upreg_list))]
upreg_list <- sort(upreg_list, decreasing = T)
#downreg_list <- sign(downreg$avg_log2FC)*(-log10(downreg$p_val_adj))
downreg_list <- downreg$avg_log2FC
names(downreg_list) <- rownames(downreg)
downreg_list <- downreg_list[na.exclude(names(downreg_list))]
downreg_list <- sort(downreg_list, decreasing = T)
downreg_list = replace(downreg_list, c(which(downreg_list %in% -Inf)),-(.Machine$double.xmax/100))
#full_list = sign(stem_cell_markers_1$avg_log2FC)*(-log10(stem_cell_markers_1$p_val_adj))
full_list = stem_cell_markers_1$avg_log2FC
names(full_list) <- rownames(stem_cell_markers_1)
full_list <- full_list[na.exclude(names(full_list))]
full_list <- sort(full_list, decreasing = T)
full_list = replace(full_list, c(which(full_list %in% -Inf)),-(.Machine$double.xmax/100))
Name2 = "HSC1_MUT vs. HSC1_WT"
stem_cell_markers_1$threshold <- as.factor(ifelse(stem_cell_markers_1$p_val_adj < 0.05 & abs(stem_cell_markers_1$avg_log2FC) >= 0.25, ifelse(stem_cell_markers_1$avg_log2FC> 0.25 ,'Up','Down'),'NoSignifi'))
ggplot(data=stem_cell_markers_1, aes(x=avg_log2FC, y=-log10(p_val_adj), colour=threshold)) +
geom_point(alpha=1, size=1.5) +
scale_color_manual(values=c("green", "grey", "red")) +
xlim(c(-4.5, 4.5)) +
geom_vline(xintercept=c(-.25, .25), lty=4,col="black",lwd=0.8) +
geom_hline(yintercept=-log10(0.05), lty=4,col="black",lwd=0.8) +
annotate("text", x=c(-1.2, 1.2), y=1.8, label=c("-1", "1")) +
annotate("text", x=-4, y=1.8, label="-log10(0.05)") +
labs(x="log2(fold change)", y="-log10 (p-value)", title=Name2) +
theme(plot.title=element_text(hjust=0.5), legend.position="right", legend.title=element_blank(
))
GO.title <- paste(Name2,"GO", collapse = " ")
KEGG.title <- paste(Name2,"KEGG", collapse = " ")
gostres <- gost(query = rownames(upreg), organism = "hsapiens", custom_bg = rownames(stem_cell_markers_1))
#gostplot(gostres, capped = FALSE, interactive = TRUE)
gostres <- gost(query = rownames(downreg), organism = "hsapiens", custom_bg = rownames(stem_cell_markers_1))
gostplot(gostres, capped = FALSE, interactive = TRUE)
Perform GSEA on up- and downregulated genes.
# ego <- enrichGO(gene = names(full_list),
# universe = rownames(stem_cell_markers_1),
# keyType = "SYMBOL",
# OrgDb = org.Hs.eg.db,
# ont = "BP",
# pAdjustMethod = "BH",
# pvalueCutoff = 0.05,
# readable = TRUE)
#
# head(ego)
gse <- gseGO(geneList=full_list,
ont ="ALL",
keyType = "SYMBOL",
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = org.Hs.eg.db,
pAdjustMethod = "none")
require(DOSE)
dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
ridgeplot(gse) + labs(x = "enrichment distribution")
#gseaplot(gse, by = "all", title = gse$Description[3], geneSetID = 3)
gseaplot2(gse, geneSetID=1:10)
Create gseKEGG objects
kegg_organism = "hsa"
names(full_list) = mapIds(org.Hs.eg.db, names(full_list), 'ENTREZID', 'SYMBOL')
# names(upreg_list) = mapIds(org.Hs.eg.db, names(upreg_list), 'ENTREZID', 'SYMBOL')
# names(downreg_list) = mapIds(org.Hs.eg.db, names(downreg_list), 'ENTREZID', 'SYMBOL')
kk2 <- gseKEGG(geneList = full_list,
organism = kegg_organism,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "none",
keyType = "ncbi-geneid")
dotplot(kk2, showCategory = 10, title = "Enriched Pathways" , split=".sign") + facet_grid(.~.sign)
#all_gene_sets = msigdbr(species = "Homo sapiens")
h_gene_sets = msigdbr(species = "human", category = "H")
pathwaysH = split(x = h_gene_sets$entrez_gene, f = h_gene_sets$gs_name)
#names(full_list) = mapIds(org.Hs.eg.db, names(full_list), 'ENTREZID', 'SYMBOL')
fgseaRes <- fgseaMultilevel(pathways=pathwaysH, stats=full_list)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.42% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fgseaResTidy <- fgseaRes %>%
as_tibble() %>%
arrange(desc(NES))
fgseaResTidy_sig = subset(fgseaResTidy, subset = padj < 0.05)
ggplot(fgseaResTidy_sig, aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill=padj<0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="Hallmark pathways NES from GSEA") +
theme_minimal()
#all_gene_sets = msigdbr(species = "Homo sapiens")
b_gene_sets = msigdbr(species = "human", category = "C2", subcategory = "CP:BIOCARTA")
pathwaysB = split(x = b_gene_sets$entrez_gene, f = b_gene_sets$gs_name)
#names(full_list) = mapIds(org.Hs.eg.db, names(full_list), 'ENTREZID', 'SYMBOL')
fgseaRes <- fgseaMultilevel(pathways=pathwaysB, stats=full_list)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.42% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fgseaResTidy <- fgseaRes %>%
as_tibble() %>%
arrange(desc(NES))
fgseaResTidy_sig = subset(fgseaResTidy, subset = padj < 0.05)
ggplot(fgseaResTidy_sig, aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill=padj<0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="Hallmark pathways NES from GSEA") +
theme_minimal()
#all_gene_sets = msigdbr(species = "Homo sapiens")
c_gene_sets = msigdbr(species = "human", category = "C2", subcategory = "CGP")
pathwaysC = split(x = c_gene_sets$entrez_gene, f = c_gene_sets$gs_name)
#names(full_list) = mapIds(org.Hs.eg.db, names(full_list), 'ENTREZID', 'SYMBOL')
fgseaRes <- fgseaMultilevel(pathways=pathwaysC, stats=full_list)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.42% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fgseaResTidy <- fgseaRes %>%
as_tibble() %>%
arrange(desc(NES))
fgseaResTidy_sig = subset(fgseaResTidy, subset = padj < 0.05)
ggplot(head(fgseaResTidy_sig, 30), aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill=padj<0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="Hallmark pathways NES from GSEA") +
theme_minimal()